# Replication package for 
# "International Third Parties and the Implementation of 
# Comprehensive Peace Agreements After Civil War"
# Johannes Karreth, Jason Quinn, Madhav Joshi, and Jaroslav Tir
# Forthcoming in the Journal of Conflict Resolution
# jkarreth@ursinus.edu

# Please read "0_readme.html" before using this code.

# This file is `1_compile_data.R`
# It compiles and cleans the data used in the analysis.

####################################
### Set up data and packages
####################################

# Set your working directory to the replication package

# setwd("...")
setwd(dirname(rstudioapi::getSourceEditorContext()$path))

library("tidyverse")

# PAM data ----------------------------------------------------------------

pam <- rio::import("Data/Source/PAM/PAM_ID V.1.5 Updated 29JULY2015.xlsx")

pam <- arrange(pam, pam_caseid, year_count)
pam <- dplyr::mutate(group_by(pam, pam_caseid), 
                     agg_implem_score_y1 = agg_implem_score[1],
                     agg_implem_score_y2 = agg_implem_score[2],
                     agg_implem_score_y3 = agg_implem_score[3],
                     agg_implem_score_y4 = agg_implem_score[4],
                     agg_implem_score_y5 = agg_implem_score[5],
                     agg_implem_score_lag = lag(agg_implem_score))

names(pam) <- paste("pam_", names(pam), sep = "")
names(pam)[1] <- "pam_caseid"
names(pam)[6] <- "pam_accord_name"
pam[pam$pam_caseid == 25, ]$pam_cease_date <- as.POSIXct("1990-10-13") # Lebanon, ep_end_date via UCDP/PRIO
pam[pam$pam_caseid == 27, ]$pam_cease_date <- as.POSIXct("1994-07-24") # Bodo, ep_end_date via UCDP/PRIO
pam$pam_warlength_days <- pam$pam_cease_date - pam$pam_war_start

# add info on leadership change from Archigos -----------------------------

pam$pam_newLeaderSinceCPA <- 0

# Guatemala: change on 1/15/00
pam[pam$pam_caseid == 1 & pam$pam_year >= 2000, ]$pam_newLeaderSinceCPA <- 1

# El Salvador: changes on 6/1/94 and 6/1/99
pam[pam$pam_caseid == 2 & pam$pam_year >= 1994, ]$pam_newLeaderSinceCPA <- 1

# Macedonia: changes on 11/1/02 and 5/12/04 and 6/2/04 and 11/18/04 and 12/17/04 and 8/27/06
pam[pam$pam_caseid == 4 & pam$pam_year >= 2002, ]$pam_newLeaderSinceCPA <- 1

# Croatia: changes on 11/26/99 and 2/2/00 and 2/18/00
pam[pam$pam_caseid == 5 & pam$pam_year >= 2000, ]$pam_newLeaderSinceCPA <- 1

# Bosnia: changes on 10/13/98 and thereafter
pam[pam$pam_caseid == 6 & pam$pam_year >= 1999, ]$pam_newLeaderSinceCPA <- 1

# Guinea-Bissau: change on 5/7/99 and thereafter
pam[pam$pam_caseid == 7 & pam$pam_year >= 2000, ]$pam_newLeaderSinceCPA <- 1

# Mali: change on 6/6/92
pam[pam$pam_caseid == 8 & pam$pam_year >= 1992, ]$pam_newLeaderSinceCPA <- 1

# Senegal: change on 4/2/12
pam[pam$pam_caseid == 9 & pam$pam_year >= 2012, ]$pam_newLeaderSinceCPA <- 1

# Niger: changes on 1/27/96 and thereafter
pam[pam$pam_caseid == 10 & pam$pam_year >= 1996, ]$pam_newLeaderSinceCPA <- 1

# Ivory Coast: change on 4/11/11
pam[pam$pam_caseid == 11 & pam$pam_year >= 2011, ]$pam_newLeaderSinceCPA <- 1

# Liberia: change on 10/14/03 and 1/16/06 (EJS)
pam[pam$pam_caseid == 12 & pam$pam_year >= 2004, ]$pam_newLeaderSinceCPA <- 1

# Sierra Leone 1: change on 3/10/98
pam[pam$pam_caseid == 13 & pam$pam_year >= 1998, ]$pam_newLeaderSinceCPA <- 1

# Sierra Leone 2: change on 9/17/07
pam[pam$pam_caseid == 14 & pam$pam_year >= 2008, ]$pam_newLeaderSinceCPA <- 1

# Burundi: changes on 4/30/03 and 8/26/05
pam[pam$pam_caseid == 16 & pam$pam_year >= 2003, ]$pam_newLeaderSinceCPA <- 1

# Rwanda: change on 7/19/94
pam[pam$pam_caseid == 17 & pam$pam_year >= 1994, ]$pam_newLeaderSinceCPA <- 1

# Djibouti: change on 5/8/99
pam[pam$pam_caseid == 18 & pam$pam_year >= 1999, ]$pam_newLeaderSinceCPA <- 1

# South Africa: no change coded b/c ANC

# India: new leaders in 1996, 1997, 1998
pam[pam$pam_caseid == 27 & pam$pam_year >= 1996, ]$pam_newLeaderSinceCPA <- 1

# Bangladesh: change in 7/15/01 and thereafter
pam[pam$pam_caseid == 28 & pam$pam_year >= 2001, ]$pam_newLeaderSinceCPA <- 1

# Nepal: changes on 8/18/08 and thereafter
pam[pam$pam_caseid == 29 & pam$pam_year >= 2001, ]$pam_newLeaderSinceCPA <- 1

# Cambodia: change on 9/21/93
pam[pam$pam_caseid == 30 & pam$pam_year >= 1994, ]$pam_newLeaderSinceCPA <- 1

# Philippines: change on 6/30/98
pam[pam$pam_caseid == 31 & pam$pam_year >= 1998, ]$pam_newLeaderSinceCPA <- 1

# Papua New Guinea: change on 8/6/02
pam[pam$pam_caseid == 34 & pam$pam_year >= 2003, ]$pam_newLeaderSinceCPA <- 1

# supplement with info from CHISOLS, which ends in 2008. 
# so the variable below is the same as above, but codes as 0 for leaders with SAME support coalition (pre-2008)

pam$pam_newSolsSinceCPA <- 0

# Guatemala: change on 1/15/00, also SOLS
pam[pam$pam_caseid == 1 & pam$pam_year >= 2000, ]$pam_newSolsSinceCPA <- 1

# El Salvador: changes on 6/1/94 and 6/1/99, but not SOLS
# pam[pam$pam_caseid == 2 & pam$pam_year >= 1994, ]$pam_newSolsSinceCPA <- 0

# Macedonia: changes on 11/1/02 (also SOLS) 
pam[pam$pam_caseid == 4 & pam$pam_year >= 2002, ]$pam_newSolsSinceCPA <- 1

# Croatia: change in 2000, also SOLS
pam[pam$pam_caseid == 5 & pam$pam_year >= 2000, ]$pam_newSolsSinceCPA <- 1

# Bosnia: SOLS only in 2006
pam[pam$pam_caseid == 6 & pam$pam_year >= 2006, ]$pam_newSolsSinceCPA <- 1

# Guinea-Bissau: change in 2000
pam[pam$pam_caseid == 7 & pam$pam_year >= 2000, ]$pam_newSolsSinceCPA <- 1

# Mali: change in 1992
pam[pam$pam_caseid == 8 & pam$pam_year >= 1992, ]$pam_newSolsSinceCPA <- 1

# Senegal: change on 4/2/12, but not in CHISOLS
# pam[pam$pam_caseid == 9 & pam$pam_year >= 2012, ]$pam_newSolsSinceCPA <- 1

# Niger: changes in 1996 and thereafter
pam[pam$pam_caseid == 10 & pam$pam_year >= 1996, ]$pam_newSolsSinceCPA <- 1

# Ivory Coast: change on 4/11/11, not in CHISOLS, but definitely SOLS
pam[pam$pam_caseid == 11 & pam$pam_year >= 2011, ]$pam_newSolsSinceCPA <- 1

# Liberia: change on 10/14/03 and 1/16/06 (EJS), also SOLS
pam[pam$pam_caseid == 12 & pam$pam_year >= 2004, ]$pam_newSolsSinceCPA <- 1

# Sierra Leone 1: change on 3/10/98, also SOLS
pam[pam$pam_caseid == 13 & pam$pam_year >= 1998, ]$pam_newSolsSinceCPA <- 1

# Sierra Leone 2: change on 9/17/07, also SOLS
pam[pam$pam_caseid == 14 & pam$pam_year >= 2008, ]$pam_newSolsSinceCPA <- 1

# Burundi: changes on 4/30/03 and 8/26/05, also SOLS
pam[pam$pam_caseid == 16 & pam$pam_year >= 2003, ]$pam_newSolsSinceCPA <- 1

# Rwanda: change on 7/19/94, also SOLS
pam[pam$pam_caseid == 17 & pam$pam_year >= 1994, ]$pam_newSolsSinceCPA <- 1

# Djibouti: change on 5/8/99, not SOLS
# pam[pam$pam_caseid == 18 & pam$pam_year >= 1999, ]$pam_newSolsSinceCPA <- 1

# South Africa: no change coded b/c ANC

# India: new leaders in 1996,  also SOLS
pam[pam$pam_caseid == 27 & pam$pam_year >= 1996, ]$pam_newSolsSinceCPA <- 1

# Bangladesh: change in 7/15/01, also SOLS
pam[pam$pam_caseid == 28 & pam$pam_year >= 2001, ]$pam_newSolsSinceCPA <- 1

# Nepal: changes on 8/18/08, also SOLS
pam[pam$pam_caseid == 29 & pam$pam_year >= 2001, ]$pam_newSolsSinceCPA <- 1

# Cambodia: change on 9/21/93, but not SOLS
# pam[pam$pam_caseid == 30 & pam$pam_year >= 1994, ]$pam_newSolsSinceCPA <- 1

# Philippines: change on 6/30/98, also SOLS
pam[pam$pam_caseid == 31 & pam$pam_year >= 1998, ]$pam_newSolsSinceCPA <- 1

# Papua New Guinea: change on 8/6/02, also SOLS
pam[pam$pam_caseid == 34 & pam$pam_year >= 2003, ]$pam_newSolsSinceCPA <- 1

pam$pam_cease_year <- lubridate::year(pam$pam_cease_date)
pam$pam_start_year <- lubridate::year(pam$pam_war_start)


# add conflict data (mediation etc.) --------------------------------------

conflicts <- readRDS("Data/Source/HSIGO-CPA_c.rds")
conflicts <- dplyr::select(conflicts, cowcode, Year, pam_caseid, ID_ucdp_year, ID_ucdp, 
                    ID_ucdp_pamyears, ucdp_conflictID, ucdp_Location, ucdp_SideA, 
                    ucdp_SideA2nd, ucdp_SideB, ucdp_SideBID, ucdp_SideB2nd, 
                    ucdp_Incompatibility_code, ucdp_TerritoryName, ucdp_IntensityLevel, 
                    ucdp_CumulativeIntensity, ucdp_TypeOfConflict, ucdp_StartDate, 
                    ucdp_StartPrec, ucdp_StartDate2, ucdp_StartPrec2, ucdp_EpEndDate, 
                    ucdp_EpEnd, pam_daystopa, pam_padate, conflictyears, conflictyears_1989, 
                    ucdp_IntensityLevel_lag, mediation_cumsum,  mediation_cumsum_log, mediation_cumsum_lag, 
                    mediation_cumsum_log_lag, cwm_yes_cumsum_lag, mic_yes_cumsum_lag, UNPKO_yes_lag, ext_int_mapo, ext_int_mapo_any)
conflicts$UNPKO_yes_preCPA <- conflicts$UNPKO_yes_lag
conflicts$UNPKO_yes_lag <- NULL

d1 <- left_join(x = pam, y = conflicts, by = c("pam_caseid" = "pam_caseid"))

##################################
### Merging other data sources ###
##################################

# various controls ------------------------------------------------------------

# using sourcex_data_RR.csv for country-specific aid volume

sourcex <- rio::import("Data/Source/sourcex_data_RR.csv")

d2a <- left_join(x = d1, y = sourcex,
                by = c("pam_cowcode" = "ccode", "pam_year" = "year"))

# separate file for GDP prior to conflict 

sourcex_prec <- rio::import("Data/Source/precGDP.csv")
d2a$pam_prestart_yr <- d2a$pam_start_year - 1
d2a$pam_prestart_yr1980 <- ifelse(d2a$pam_prestart_yr < 1980, 1980, d2a$pam_prestart_yr)

d2 <- left_join(x = d2a, y = sourcex_prec,
                by = c("pam_cowcode" = "ccode", "pam_prestart_yr1980" = "prec_year"))

# Peacekeeping ------------------------------------------------------------

# Global, from http://www.providingforpeacekeeping.org/contributions/
pk2 <- rio::import("Data/Source/UNPK/PfP/data_mission.csv")
names(pk2) <- gsub(pattern = "[^[:alnum:]]", replacement = "", x = names(pk2))
pk2$Date <- as.Date(pk2$Date)
pk2$year <- as.numeric(format(pk2$Date, "%Y"))
table(pk2$CountryofMission)

pky <- dplyr::summarize(dplyr::group_by(pk2, CountryofMissionISO3, year),
                        UNPKO_Troops = median(Troops, na.rm = TRUE),
                        UNPKO_Police = median(Police, na.rm = TRUE),
                        UNPKO_Observers = median(Observers, na.rm = TRUE),
                        UNPKO_Personnel = median(Total, na.rm = TRUE))
pky$UNPKO_yes <- 1
pky$cowcode <- countrycode::countrycode(sourcevar = pky$CountryofMissionISO3,
                                        origin = "iso3c", 
                                        destination = "cown")
pky[pky$CountryofMissionISO3 == "ESH", ]$cowcode <- 600 # Western Sahara, but formally Morocco
pky[pky$CountryofMissionISO3 == "SRB", ]$cowcode <- 345  # Serbia

# Lags
pky <- arrange(pky, CountryofMissionISO3, year)
pky2 <- mutate(group_by(pky, CountryofMissionISO3),
               UNPKO_Troops_lag = lag(UNPKO_Troops, n = 1),
               UNPKO_Police_lag = lag(UNPKO_Police, n = 1),
               UNPKO_Observers_lag = lag(UNPKO_Observers, n = 1),
               UNPKO_Personnel_lag = lag(UNPKO_Personnel, n = 1),
               UNPKO_yes_lag = lag(UNPKO_yes, n = 1))

d3 <- left_join(x = d2, y = pky2,
                     by = c("pam_cowcode" = "cowcode", "pam_year" = "year"))

# Individual cases that were not in the source data
# now taken from http://www.un.org/en/peacekeeping/documents/operationslist.pdf

d3[d3$pam_cowcode == 660 & d3$pam_year >= 1978, ]$UNPKO_yes <- 1
d3[d3$pam_cowcode == 660 & d3$pam_year >= 1979, ]$UNPKO_yes_lag <- 1

# Angola 1989-1991 (withdrawal of Cuban troops)
d3[d3$pam_cowcode == 540 & d3$pam_year >= 1989 & d3$pam_year <= 1991, ]$UNPKO_yes <- 1
d3[d3$pam_cowcode == 540 & d3$pam_year >= 1990 & d3$pam_year <= 1992, ]$UNPKO_yes_lag <- 1


# Costa Rica, El Salvador, Guatemala, Honduras and Nicaragua 1989-1992
d3[d3$pam_cowcode == 92 & d3$pam_year >= 1989 & d3$pam_year <= 1992, ]$UNPKO_yes <- 1
d3[d3$pam_cowcode == 90 & d3$pam_year >= 1989 & d3$pam_year <= 1992, ]$UNPKO_yes <- 1
d3[d3$pam_cowcode == 92 & d3$pam_year >= 1990 & d3$pam_year <= 1993, ]$UNPKO_yes_lag <- 1
d3[d3$pam_cowcode == 90 & d3$pam_year >= 1990 & d3$pam_year <= 1993, ]$UNPKO_yes_lag <- 1

# Kathman peacekeeping data, from https://kathmanundata.weebly.com/mission-personnel-dataset.html --------------------------------------

kath <- rio::import("Data/Source/UNPK/Kathman/mission-month_12-2019.xlsx")

kath$troop <- ifelse(kath$troop < 0, NA, kath$troop)
kath$police <- ifelse(kath$police < 0, NA, kath$troop)
kath$total <- ifelse(kath$total < 0, NA, kath$troop)

kath_my <- summarize(group_by(filter(kath, missionccode > 0), mission, year),
                     kath_troop = median(troop, na.rm = TRUE),
                     kath_police = median(police, na.rm = TRUE),
                     kath_total = median(total, na.rm = TRUE),
                     missionccode = first(missionccode))

kath_cy <- summarize(group_by(kath_my, missionccode, year),
                     kath_troop = sum(kath_troop, na.rm = TRUE),
                     kath_police = sum(kath_police, na.rm = TRUE),
                     kath_total = sum(kath_total, na.rm = TRUE))

# Lags

kath_cy2 <- mutate(group_by(kath_cy, missionccode),
                   kath_troop_lag = lag(kath_troop, n = 1),
                   kath_police_lag = lag(kath_police, n = 1),
                   kath_total_lag = lag(kath_total, n = 1))

# merge, then make no-PKO years 0

d3b <- left_join(x = d3, y = kath_cy2,
                 by = c("pam_cowcode" = "missionccode", "pam_year" = "year"))

d3b$UNPKO_Troops_lag_NA <- ifelse(is.na(d3b$UNPKO_Troops_lag) == TRUE, 0, d3b$UNPKO_Troops_lag)
d3b$UNPKO_Police_lag_NA <- ifelse(is.na(d3b$UNPKO_Police_lag) == TRUE, 0, d3b$UNPKO_Police_lag)
d3b$UNPKO_Personnel_lag_NA <- ifelse(is.na(d3b$UNPKO_Personnel_lag) == TRUE, 0, d3b$UNPKO_Personnel_lag)
d3b$kath_police_lag_NA <- ifelse(is.na(d3b$kath_police_lag) == TRUE, 0, d3b$kath_police_lag)
d3b$kath_troop_lag_NA <- ifelse(is.na(d3b$kath_troop_lag) == TRUE, 0, d3b$kath_troop_lag)
d3b$kath_total_lag_NA <- ifelse(is.na(d3b$kath_total_lag) == TRUE, 0, d3b$kath_total_lag)

# Other macro data --------------------------------------------------------

# GDP, Democracy, etc.

library("bit64")
qog <- rio::import("Data/Source/QoG_data_vJan18/qog_std_ts_jan18.csv")

qog_small <- qog[qog$year >= 1988, c("cname", "ccode", "ccodecow", "year",
                                     # "aid_crnc", "aid_crnio", "aid_crsc", "aid_crsio", 
                                     "al_ethnic",
                                     # "al_language", "al_religion", "chga_demo",
                                     # "ciri_physint", 
                                     "fe_etfra",
                                     # "gle_cgdpc", "gle_pop",
                                     # "gle_rgdpc", "gle_trade", 
                                     "h_j", "h_polcon3", "h_polcon5", "ht_colonial",
                                     # "imf_gdpgr",
                                     # "ht_regtype1", "p_polity2", "r_elf85",
                                     "ross_oil_netexp", "ucdp_type2", 
                                     # "uds_mean", "vdem_polyarchy",
                                     "wdi_gdpcapcon2010"
                                     # "wdi_gdpgr", "wdi_gdppppcur", "wdi_pop", "wdi_popden", "wdi_trade"
                                     )]

qog_small$cowcode <- countrycode::countrycode(sourcevar = qog_small$ccode,
                                              origin = "iso3n",
                                              destination = "cown")
qog_small[qog_small$cname == "Sudan (-2011)" & qog_small$year <= 2011, ]$cowcode <- 625
qog_small <- qog_small[!(qog_small$cname == "Sudan (2012-)" & qog_small$year <= 2011), ]
qog_small[qog_small$cname == "Yemen" & qog_small$year >= 1990, ]$cowcode <- 678
qog_small[qog_small$cname == "Serbia" & qog_small$year >= 2006 ,]$cowcode <- 345
qog_small[qog_small$cname == "Serbia and Montenegro" & qog_small$year >= 1992 & qog_small$year <= 2005, ]$cowcode <- 345
qog_small[qog_small$cname == "Yugoslavia" & qog_small$year >= 1992 ,]$cowcode <- NA
qog_small <- qog_small[is.na(qog_small$cowcode) == FALSE, ]
qog_small$ccode <- NULL
qog_small$qog_year <- qog_small$year
qog_small$year <- NULL

qog_small$col_french <- ifelse(qog_small$ht_colonial == 6, 1, 0)
qog_small$col_brit <- ifelse(qog_small$ht_colonial == 5, 1, 0)

# Maximize coverage of macro variables
# Some of these will throw errors, if so these are implemented correctly further below.

# Ethnic fractionalization

# qog_small[is.na(qog_small$al_ethnic) == TRUE | is.na(qog_small$fe_etfra) == TRUE, c("cowcode", "cname", "qog_year", "pam_caseid", "al_ethnic", "fe_etfra")]

# qog_small[qog_small$cowcode == 530, c("cowcode", "cname", "qog_year", "pam_caseid", "al_ethnic", "fe_etfra")]
qog_small[qog_small$cowcode == 530, ]$al_ethnic <- mean(qog_small[qog_small$cowcode == 530, ]$al_ethnic, na.rm = TRUE)
qog_small[qog_small$cowcode == 530, ]$fe_etfra <- mean(qog_small[qog_small$cowcode == 530, ]$fe_etfra, na.rm = TRUE)

# qog_small[qog_small$cowcode == 344, c("cowcode", "cname", "qog_year", "pam_caseid", "al_ethnic", "fe_etfra")]
qog_small[qog_small$cowcode == 344, ]$al_ethnic <- mean(qog_small[qog_small$cowcode == 344, ]$al_ethnic, na.rm = TRUE)
qog_small[qog_small$cowcode == 344, ]$fe_etfra <- mean(qog_small[qog_small$cowcode == 344, ]$fe_etfra, na.rm = TRUE)

# qog_small[qog_small$cowcode == 625, c("cowcode", "cname", "qog_year", "pam_caseid", "al_ethnic", "fe_etfra")]
qog_small[qog_small$cowcode == 625, ]$al_ethnic <- mean(qog_small[qog_small$cowcode == 625, ]$al_ethnic, na.rm = TRUE)
qog_small[qog_small$cowcode == 625, ]$fe_etfra <- mean(qog_small[qog_small$cowcode == 625, ]$fe_etfra, na.rm = TRUE)

qog_small$ethfrac <- rowMeans(qog_small[ , c("al_ethnic", "fe_etfra")], na.rm = TRUE)

qog_small <- qog_small[qog_small$qog_year >= 1989, ]

d4 <- left_join(x = d3b, y = qog_small,
                by = c("pam_cowcode" = "cowcode", "pam_year" = "qog_year"))

# Battle deaths -----------------------------------------------------------

bd <- rio::import("/Users/johanneskarreth/Documents/Dropbox/Uni/1 - Papers/HSIGOs and CPAs/Data/Source/UCDP-PRIO/ucdp-brd-conf-181.csv")
id_conv <- rio::import("/Users/johanneskarreth/Documents/Dropbox/Uni/1 - Papers/HSIGOs and CPAs/Data/Source/UCDP-PRIO/translate_conf.csv")

bd_new <- left_join(x = bd, y = id_conv, by = c("conflict_id" = "new_id"))
bd_new <- arrange(bd_new, conflict_id, year)

bd_new <- mutate(group_by(bd_new, conflict_id),
                 bdbest_cumsum = cumsum(bdbest))

bd_new$ucdp_conflictID_new <- bd_new$conflict_id
bd_new$ucdp_conflictID <- bd_new$old_id

bd_merge <- dplyr::select(bd_new, ucdp_conflictID_new, ucdp_conflictID, year, bdbest_cumsum)
bd_merge$ucdp_conflictID <- as.numeric(bd_merge$ucdp_conflictID)
bd_merge <- filter(bd_merge, !is.na(ucdp_conflictID))

# select conflicts from battle deaths data by hand!

bd_merge <- filter(bd_merge, 
                   (ucdp_conflictID_new == 327 & year == 1994) | 
                     (ucdp_conflictID_new == 327 & year == 2002) | 
                     (ucdp_conflictID_new == 322 & year == 1991) | 
                     (ucdp_conflictID_new == 389 & year == 1995) | 
                     (ucdp_conflictID_new == 287 & year == 2000) | 
                     (ucdp_conflictID_new == 300 & year == 1991) | 
                     (ucdp_conflictID_new == 408 & year == 1999) | 
                     (ucdp_conflictID_new == 390 & year == 1995) | 
                     (ucdp_conflictID_new == 379 & year == 1994) | 
                     (ucdp_conflictID_new == 379 & year == 1999) | 
                     (ucdp_conflictID_new == 316 & year == 1991) | 
                     (ucdp_conflictID_new == 233 & year == 1995) | 
                     (ucdp_conflictID_new == 410 & year == 1998) | 
                     (ucdp_conflictID_new == 421 & year == 1990) | 
                     (ucdp_conflictID_new == 366 & year == 2005) | 
                     (ucdp_conflictID_new == 419 & year == 2004) | 
                     (ucdp_conflictID_new == 260 & year == 1989) | 
                     (ucdp_conflictID_new == 341 & year == 2003) | 
                     (ucdp_conflictID_new == 417 & year == 2001) | 
                     (ucdp_conflictID_new == 372 & year == 1990) | 
                     (ucdp_conflictID_new == 332 & year == 1992) | 
                     (ucdp_conflictID_new == 269 & year == 2006) | 
                     (ucdp_conflictID_new == 406 & year == 1995) | 
                     (ucdp_conflictID_new == 315 & year == 1998) | 
                     (ucdp_conflictID_new == 369 & year == 1996) | 
                     (ucdp_conflictID_new == 308 & year == 1996) | 
                     (ucdp_conflictID_new == 374 & year == 1993) | 
                     (ucdp_conflictID_new == 375 & year == 2003) | 
                     (ucdp_conflictID_new == 382 & year == 1996) | 
                     (ucdp_conflictID_new == 382 & year == 1999) | 
                     (ucdp_conflictID_new == 309 & year == 2005) | 
                     (ucdp_conflictID_new == 395 & year == 1997) | 
                     (ucdp_conflictID_new == 330 & year == 1999))

bd_merge$ucdp_conflictID_CPA <- bd_merge$ucdp_conflictID
bd_merge[bd_merge$ucdp_conflictID_new == 327 & bd_merge$year == 1994, ]$ucdp_conflictID_CPA <- 131.1
bd_merge[bd_merge$ucdp_conflictID_new == 327 & bd_merge$year == 2002, ]$ucdp_conflictID_CPA <- 131.2

bd_merge[bd_merge$ucdp_conflictID_new == 379 & bd_merge$year == 1994, ]$ucdp_conflictID_CPA <- 184.1
bd_merge[bd_merge$ucdp_conflictID_new == 379 & bd_merge$year == 1999, ]$ucdp_conflictID_CPA <- 184.2

bd_merge[bd_merge$ucdp_conflictID_new == 382 & bd_merge$year == 1996, ]$ucdp_conflictID_CPA <- 187.1
bd_merge[bd_merge$ucdp_conflictID_new == 382 & bd_merge$year == 1999, ]$ucdp_conflictID_CPA <- 187.2

# because I skipped prior steps due to new sourcex
d5 <- d4

d5$ucdp_conflictID_CPA <- as.numeric(d5$ID_ucdp)
d5[d5$pam_accord_name == "Interim Constitution Accord, Nov 17 1993", ]$ucdp_conflictID_CPA <- 150
d5[d5$ucdp_conflictID_CPA == 131 & d5$pam_caseid == 20, ]$ucdp_conflictID_CPA <- 131.1
d5[d5$ucdp_conflictID_CPA == 131 & d5$pam_caseid == 21, ]$ucdp_conflictID_CPA <- 131.2

d5[d5$ucdp_conflictID_CPA == 184 & d5$pam_caseid == 18, ]$ucdp_conflictID_CPA <- 184.1
d5[d5$ucdp_conflictID_CPA == 184 & d5$pam_caseid == 19, ]$ucdp_conflictID_CPA <- 184.2

d5[d5$ucdp_conflictID_CPA == 187 & d5$pam_caseid == 13, ]$ucdp_conflictID_CPA <- 187.1
d5[d5$ucdp_conflictID_CPA == 187 & d5$pam_caseid == 14, ]$ucdp_conflictID_CPA <- 187.2

d6 <- left_join(x = d5, y = dplyr::select(bd_merge, ucdp_conflictID_new, ucdp_conflictID, ucdp_conflictID_CPA, bdbest_cumsum),
                     by = c("ucdp_conflictID_CPA" = "ucdp_conflictID_CPA"))

# Excluded population (Cederman et al / EPR); though some is also in sourcex ------------------------------

epr <- rio::import("/Users/johanneskarreth/Documents/Dropbox/Uni/1 - Papers/HSIGOs and CPAs/Data/Source/EPR/data_20190124.csv")
epr$cowcode <- epr$countries_gwid
epr$Year <- epr$year
epr <- epr[, c("cowcode", "Year", "exclpop", "lexclpop")]

# because I skipped prior steps due to new sourcex
d7 <- d6

d8 <- left_join(x = d7, y = epr,
                     by = c("pam_cowcode" = "cowcode", "pam_year" = "Year"))

# S-scores ----------------------------------------

# From Haege (2017): http://dx.doi.org/10.7910/DVN/ALVXLM
# Use UN-based measures: srsvvs_us, srsvva_us, kappavv_us, pivv_us
fpsim <- rio::import("Data/Source/Foreign policy similarity/fpsim_us.csv")
fpsim <- fpsim[, c("ccode", "year", "srsvvs_us", "srsvva_us", "kappavv_us", "pivv_us")]
names(fpsim) <- c("cowcode", "Year", "srsvvs_us", "srsvva_us", "kappavv_us", "pivv_us")

d9 <- left_join(x = d8, y = fpsim,
                     by = c("pam_cowcode" = "cowcode", "pam_year" = "Year"))

# Ideal point affinity with major donors
# from https://doi.org/10.7910/DVN/LEJUQZ

unga <- rio::import("Data/Source/UNGA voting/IdealpointestimatesAll_Mar2021.csv")

unga <- mutate(group_by(filter(unga, session >= 26), session),
               idp_us = IdealPointAll[ccode == 2],
               idp_france = IdealPointAll[ccode == 220],
               idp_uk = IdealPointAll[ccode == 200],
               idp_china = IdealPointAll[ccode == 710])

unga$diff_us <- abs(unga$IdealPointAll - unga$idp_us)
unga$diff_france <- abs(unga$IdealPointAll - unga$idp_france)
unga$diff_uk <- abs(unga$IdealPointAll - unga$idp_uk)
unga$diff_china <- abs(unga$IdealPointAll - unga$idp_china)
unga$diff_avgoecd <- (unga$diff_us + unga$diff_france + unga$diff_uk) / 3

unga$year <- unga$session + 1945

unga <- dplyr::select(ungroup(unga), ccode, year, IdealPointAll, diff_us, diff_france, diff_uk, diff_china, diff_avgoecd)

# fill in missing obs, but do it for UNGA, then create lags!

unga[unga$ccode == 346 & (unga$year == 1997 | unga$year == 1998), ]$diff_us <- unga[unga$ccode == 346 & unga$year == 1996, ]$diff_us
unga[unga$ccode == 346 & (unga$year == 1997 | unga$year == 1998), ]$diff_france <- unga[unga$ccode == 346 & unga$year == 1996, ]$diff_france
unga[unga$ccode == 346 & (unga$year == 1997 | unga$year == 1998), ]$diff_uk <- unga[unga$ccode == 346 & unga$year == 1996, ]$diff_uk
unga[unga$ccode == 346 & (unga$year == 1997 | unga$year == 1998), ]$diff_china <- unga[unga$ccode == 346 & unga$year == 1996, ]$diff_china
unga[unga$ccode == 346 & (unga$year == 1997 | unga$year == 1998), ]$diff_avgoecd <- unga[unga$ccode == 346 & unga$year == 1996, ]$diff_avgoecd

unga[unga$ccode == 404 & (unga$year == 2000 | unga$year == 2001), ]$diff_us <- unga[unga$ccode == 404 & unga$year == 1999, ]$diff_us
unga[unga$ccode == 404 & (unga$year == 2000 | unga$year == 2001), ]$diff_france <- unga[unga$ccode == 404 & unga$year == 1999, ]$diff_france
unga[unga$ccode == 404 & (unga$year == 2000 | unga$year == 2001), ]$diff_uk <- unga[unga$ccode == 404 & unga$year == 1999, ]$diff_uk
unga[unga$ccode == 404 & (unga$year == 2000 | unga$year == 2001), ]$diff_china <- unga[unga$ccode == 404 & unga$year == 1999, ]$diff_china
unga[unga$ccode == 404 & (unga$year == 2000 | unga$year == 2001), ]$diff_avgoecd <- unga[unga$ccode == 404 & unga$year == 1999, ]$diff_avgoecd

unga[unga$ccode == 436 & unga$year == 1999, ]$diff_us <- unga[unga$ccode == 436 & unga$year == 1998, ]$diff_us
unga[unga$ccode == 436 & unga$year == 1999, ]$diff_france <- unga[unga$ccode == 436 & unga$year == 1998, ]$diff_france
unga[unga$ccode == 436 & unga$year == 1999, ]$diff_uk <- unga[unga$ccode == 436 & unga$year == 1998, ]$diff_uk
unga[unga$ccode == 436 & unga$year == 1999, ]$diff_china <- unga[unga$ccode == 436 & unga$year == 1998, ]$diff_china
unga[unga$ccode == 436 & unga$year == 1999, ]$diff_avgoecd <- unga[unga$ccode == 436 & unga$year == 1998, ]$diff_avgoecd

unga[unga$ccode == 436 & (unga$year == 2001 | unga$year == 2002), ]$diff_us  <- unga[unga$ccode == 436 & unga$year == 2000, ]$diff_us
unga[unga$ccode == 436 & (unga$year == 2001 | unga$year == 2002), ]$diff_france  <- unga[unga$ccode == 436 & unga$year == 2000, ]$diff_france
unga[unga$ccode == 436 & (unga$year == 2001 | unga$year == 2002), ]$diff_uk  <- unga[unga$ccode == 436 & unga$year == 2000, ]$diff_uk
unga[unga$ccode == 436 & (unga$year == 2001 | unga$year == 2002), ]$diff_china  <- unga[unga$ccode == 436 & unga$year == 2000, ]$diff_china
unga[unga$ccode == 436 & (unga$year == 2001 | unga$year == 2002), ]$diff_avgoecd  <- unga[unga$ccode == 436 & unga$year == 2000, ]$diff_avgoecd

unga[unga$ccode == 450 & unga$year == 2003, ]$diff_us <- unga[unga$ccode == 450 & unga$year == 1997, ]$diff_us
unga[unga$ccode == 450 & unga$year == 2003, ]$diff_france <- unga[unga$ccode == 450 & unga$year == 1997, ]$diff_france
unga[unga$ccode == 450 & unga$year == 2003, ]$diff_uk <- unga[unga$ccode == 450 & unga$year == 1997, ]$diff_uk
unga[unga$ccode == 450 & unga$year == 2003, ]$diff_china <- unga[unga$ccode == 450 & unga$year == 1997, ]$diff_china
unga[unga$ccode == 450 & unga$year == 2003, ]$diff_avgoecd <- unga[unga$ccode == 450 & unga$year == 1997, ]$diff_avgoecd

unga[unga$ccode == 702 & unga$year == 2001, ]$diff_us <- unga[unga$ccode == 702 & unga$year == 2000, ]$diff_us
unga[unga$ccode == 702 & unga$year == 2001, ]$diff_france <- unga[unga$ccode == 702 & unga$year == 2000, ]$diff_france
unga[unga$ccode == 702 & unga$year == 2001, ]$diff_uk <- unga[unga$ccode == 702 & unga$year == 2000, ]$diff_uk
unga[unga$ccode == 702 & unga$year == 2001, ]$diff_china <- unga[unga$ccode == 702 & unga$year == 2000, ]$diff_china
unga[unga$ccode == 702 & unga$year == 2001, ]$diff_avgoecd <- unga[unga$ccode == 702 & unga$year == 2000, ]$diff_avgoecd

unga[unga$ccode == 811 & (unga$year == 1997 | unga$year == 1998), ]$diff_us <- unga[unga$ccode == 811 & unga$year == 1996, ]$diff_us
unga[unga$ccode == 811 & (unga$year == 1997 | unga$year == 1998), ]$diff_france <- unga[unga$ccode == 811 & unga$year == 1996, ]$diff_france
unga[unga$ccode == 811 & (unga$year == 1997 | unga$year == 1998), ]$diff_uk <- unga[unga$ccode == 811 & unga$year == 1996, ]$diff_uk
unga[unga$ccode == 811 & (unga$year == 1997 | unga$year == 1998), ]$diff_china <- unga[unga$ccode == 811 & unga$year == 1996, ]$diff_china
unga[unga$ccode == 811 & (unga$year == 1997 | unga$year == 1998), ]$diff_avgoecd <- unga[unga$ccode == 811 & unga$year == 1996, ]$diff_avgoecd

unga <- mutate(group_by(unga, ccode),
               diff_us_lag = lag(diff_us),
               diff_france_lag = lag(diff_france),
               diff_uk_lag = lag(diff_uk),
               diff_china_lag = lag(diff_china),
               diff_avgoecd_lag = lag(diff_avgoecd))

names(unga) <- c("ccode", "year", "IdealPointAll", "unga_diff_us", "unga_diff_france", 
                 "unga_diff_uk", "unga_diff_china", "unga_diff_avgoecd", "unga_diff_us_lag", "unga_diff_france_lag", 
                 "unga_diff_uk_lag", "unga_diff_china_lag", "unga_diff_avgoecd_lag")

d10 <- left_join(x = d9, y = unga,
                 by = c("pam_cowcode" = "ccode", "pam_year" = "year"))


# Resources (Lujala 2010 JPR) ---------------------------------------------

res <- rio::import("Data/Source/Lujala - Resources/Lujala Spoils of Nature data ONSET.dta")
res <- dplyr::select(res,
              ccode, year, 
              allgemsP, drugs, oilp)

# Carry forward 2003 values

sp <- countrycode::codelist_panel
sp <- filter(sp, year >= 2003 & year <= 2015 & !is.na(cown))

sp_2003 <- expand.grid(unique(sp[sp$year == 2003, ]$cown), c(2004:2015))
names(sp_2003) <- c("cown", "year")
sp_2003 <- arrange(sp_2003, cown, year)

res_2003 <- res[res$year == 2003, ]

res2 <- left_join(x = sp_2003, y = res_2003[, -2], by = c("cown" = "ccode"))
names(res2) <- c("ccode", "year", "allgemsP", "drugs", "oilp")

res3 <- rbind(res, res2)

res4 <- arrange(res3, ccode, year)

res4$gems_drugs_oil <- ifelse(res4$allgemsP == 1 | res4$drugs == 1 | res4$oilp == 1, 1, 0)

# because I skipped prior steps due to new sourcex
# d10 <- d9

d11 <- left_join(x = d10, y = res4,
                          by = c("pam_cowcode" = "ccode", "pam_year" = "year"))

# d12 <- left_join(x = d11, y = bq2_nodups,
#                           by = c("pam_cowcode" = "cowcode", "pam_year" = "Year"))

# because I skipped prior steps due to new sourcex
d12 <- d11

# some country fixes -----------------------------------------------

d12$democracy6_lag <- ifelse(d12$polity2_P4_lag1 >= 6, 1, 0)

d12[d12$pam_cowcode == 346, ]$democracy6_lag <- 0

d12[d12$pam_cowcode == 660, ]$democracy6_lag <- 0

d12$pop_WDI_PW_last3_lag1_log <- log(d12$pop_WDI_PW_last3_lag1)

d12[d12$pam_caseid == 23, ]$ucdp_CumulativeIntensity <- 0

# Female participation in CPA negotiation from Krause et al. (https://doi.org/10.7910/DVN/LNMEXL) -----------------------

fpkkb <- rio::import("Data/Source/Krause/Copy of PAM_ID%20V.1.5%20Updated%2029JULY2015-3.csv")
fpkkb <- select(fpkkb, pam_caseid, year_count, `Fem. Signatories`)
names(fpkkb) <- c("pam_caseid", "year_count", "pam_FemSignatories")

d13 <- left_join(x = d12, y = fpkkb, 
                 by = c("pam_caseid" = "pam_caseid", "pam_year_count" = "year_count"))

d13$pam_FemSignatories <- ifelse(is.na(d13$pam_FemSignatories) == TRUE, 0, d13$pam_FemSignatories)

# Prepare data for analysis; recoding etc. --------------------------------

d <- d13

# Verify the right conflicts are associated with the right CPAs

View(arrange(d[, c("cowcode", "Year", "pam_cowcode", "pam_year", "ID_ucdp_year", "mediation_cumsum", "pam_caseid", "ucdp_IntensityLevel")], ID_ucdp_year))

# Note that for some reason (?) South Africa late 80s is missing from the conflict data, even though it is in the ACD (new ID 345, ended in 1988)
# fixed below!

# Time and region indicators
d$year_group <- as.numeric(cut(d$pam_year, breaks = c(1989, 1991, 1995, 2000, 2005, 2010), include.lowest = TRUE))
d$continent <- countrycode::countrycode(sourcevar = d$pam_cowcode, 
                                        origin = "cown",
                                        destination = "continent")
d$continent <- ifelse(d$pam_cowcode == 345, "Europe", d$continent)
d$continent <- ifelse(d$pam_cowcode == 678, "Asia", d$continent)
d$continent <- as.numeric(as.factor(d$continent))

# Fixes from data inspection

# mediation_cumsum_log_lag
# South Africa - UCDP/PRIO old ID 150, new ID 345
# -> code as log(1) one mediation year per CWM (1983)
d[d$pam_caseid == 23, ]$mediation_cumsum <- 1
d[d$pam_caseid == 23, ]$mediation_cumsum_lag <- 1
d[d$pam_caseid == 23, ]$mediation_cumsum_log <- log(1 + 1)
d[d$pam_caseid == 23, ]$mediation_cumsum_log_lag <- log(1 + 1)

# bdbest_cumsum
# South Africa
# -> fill in log(4012) from Lacina & Gleditsch (2005)
d[d$pam_caseid == 23, ]$bdbest_cumsum <- 4012
# Niger
# -> fill in log(400) from Lacina & Gleditsch (2005)
d[d$pam_caseid == 10, ]$bdbest_cumsum <- 400

# Intensity
d[d$pam_caseid == 23, ]$ucdp_CumulativeIntensity <- 0
d[d$pam_caseid == 23, ]$ucdp_IntensityLevel <- 0
d[d$pam_caseid == 23, ]$ucdp_IntensityLevel_lag <- 0

# PKO
# South Africa
d[d$pam_caseid == 23, ]$UNPKO_yes <- 0
d[d$pam_caseid == 23, ]$UNPKO_yes_lag <- 0
d[d$pam_caseid == 23, ]$UNPKO_yes_preCPA <- 0

# d$bdbest_cumsum_log <- log(d$bdbest_cumsum + 1)
d$pam_warlength_days_log <- log(as.numeric(d$pam_warlength_days))

d[d$pam_caseid == 23, ]$cowcode <- d[d$pam_caseid == 23, ]$pam_cowcode
d[d$pam_caseid == 23, ]$Year <- d[d$pam_caseid == 23, ]$pam_year

# verify that South Africa is now included
View(arrange(d[, c("cowcode", "Year", "pam_cowcode", "pam_year", "ID_ucdp_year", "mediation_cumsum", "pam_caseid", "ucdp_IntensityLevel")], ID_ucdp_year))

# Rescaling variables -----------------------------------------------------

# remove .s in variable names
names(d) <- gsub(pattern = "\\.", replace = "\\_", x = names(d))
# shorten long varnames
names(d) <- gsub(pattern = "aiddata", replace = "aidd", x = names(d))
names(d) <- gsub(pattern = "commitment", replace = "comm", x = names(d))
names(d) <- gsub(pattern = "oecd", replace = "o", x = names(d))
names(d) <- gsub(pattern = "UNCTAD", replace = "UN", x = names(d))
names(d) <- gsub(pattern = "fdistocks", replace = "fdist", x = names(d))
names(d) <- gsub(pattern = "fdiflows", replace = "fdifl", x = names(d))
names(d) <- gsub(pattern = "totoffflows", replace = "totof", x = names(d))
names(d) <- gsub(pattern = "totodflows", replace = "totodflo", x = names(d))


# # standardize continuous variables --------------------------------------

# note that I already did this in sourcex, so add a "sp" suffix for "standardized by PAM" ...
d <- d %>%
  ungroup() %>%
  mutate(across(c(exports_GDP_ihst_last3_lag1, impexp_GDP_ihst_last3_lag1, impexp_p5_GDP_ihst_last3_lag1, impexp_p7_GDP_ihst_last3_lag1,
                  aidd_comm_GDP_ihst_last3_lag1,
                  o_comm_GDP_ihst_last3_lag1, o_aidtotal_GDP_ihst_last3_lag1, o_totof_GDP_ihst_last3_lag1, o_totodflo_GDP_ihst_last3_lag1, 
                  us_econ_GDP_ihst_last3_lag1, us_mil_GDP_ihst_last3_lag1,
                  DT_NFL_MIDA_CD_GDP_last3_lag1, DT_DOD_DIMF_CD_GDP_last3_lag1, DT_DOD_MWBG_CD_GDP_last3_lag1, 
                  fdist_UN_GDP_ihst_last3_lag1, fdifl_UN_GDP_ihst_last3_lag1,
                  exports_last3_lag1, impexp_last3_lag1, impexp_p5_last3_lag1, impexp_p7_last3_lag1,
                  aidd_comm_last3_lag1,
                  aidd_comm_us, aidd_comm_france, aidd_comm_uk,
                  o_comm_last3_lag1, o_aidtotal_last3_lag1, o_totof_last3_lag1, o_totodflo_last3_lag1, 
                  us_econ_last3_lag1, us_mil_lag1,
                  DT_NFL_MIDA_CD_last3_lag1, DT_DOD_DIMF_CD_last3_lag1, DT_DOD_MWBG_CD_last3_lag1, DT_NFL_IMFC_CD_last3_lag1,
                  fdist_UN_last3_lag1, fdifl_UN_last3_lag1,
                  hligo_lag, non_hligo_lag, hsigo_lag, non_hsigo_lag, hsigo_cb_lag, non_cb_hsigo_lag, hsigo_region_lag, hsigo_cb_region_lag, hligo_region_lag,
                  aidIV_frac1_last3_lag1, aidIV_frac2_last3_lag1, aidIV_frac3_last3_lag1, aidIV_frac4_last3_lag1),
                list("sp" = ~ . / (2*sd(., na.rm = TRUE)))))

meanstd <- function(x){
  (x - mean(x, na.rm = TRUE)) / (2 * sd(x, na.rm = TRUE))
}

d$bdbest_cumsum_log <- log(d$bdbest_cumsum + 1)
d$exclpop_std <- meanstd(d$exclpop)
d$conflictyears_log <- log(d$conflictyears + 1)
d$srsvvs_us_std <- meanstd(d$srsvvs_us)
d$srsvva_us_std <- meanstd(d$srsvva_us)
d$kappavv_us_std <- meanstd(d$kappavv_us)
d$pivv_us_std <- meanstd(d$pivv_us)
d$unga_diff_avgo_std <- meanstd(d$unga_diff_avgo)
d$unga_diff_avgo_lag_std <- meanstd(d$unga_diff_avgo_lag)
d$unga_diff_china_std <- meanstd(d$unga_diff_china)
d$unga_diff_china_lag_std <- meanstd(d$unga_diff_china_lag)
d$unga_diff_us_std <- meanstd(d$unga_diff_us)
d$unga_diff_us_lag_std <- meanstd(d$unga_diff_us_lag)

d$territorial_conflict <- ifelse(d$ucdp_Incompatibility_code == 1, 1, 0)
d$territorial_conflict <- ifelse(is.na(d$territorial_conflict) == TRUE, 0, d$territorial_conflict)

# Center etc. (for Bell & Jones REWB regressions) ----------------------------------

# 1. Generate country(case)-means, based on the original data before dropping observations with missing values

# Specific

# d <- mutate(group_by(d, pam_caseid),
#             exports_GDP_ihst_last3_lag1_b = mean(exports_GDP_ihst_last3_lag1_sp, na.rm = TRUE))

# generalized
d <- group_by(d, pam_caseid) %>%
  mutate(across(c(exports_GDP_ihst_last3_lag1_sp, impexp_GDP_ihst_last3_lag1_sp, impexp_p5_GDP_ihst_last3_lag1_sp, impexp_p7_GDP_ihst_last3_lag1_sp,
                  aidd_comm_GDP_ihst_last3_lag1_sp,
                  o_comm_GDP_ihst_last3_lag1_sp, o_aidtotal_GDP_ihst_last3_lag1_sp, o_totof_GDP_ihst_last3_lag1_sp, o_totodflo_GDP_ihst_last3_lag1_sp, 
                  us_econ_GDP_ihst_last3_lag1_sp, us_mil_GDP_ihst_last3_lag1_sp,
                  DT_NFL_MIDA_CD_GDP_last3_lag1_sp, DT_DOD_DIMF_CD_GDP_last3_lag1_sp, DT_DOD_MWBG_CD_GDP_last3_lag1_sp, 
                  fdist_UN_GDP_ihst_last3_lag1_sp, fdifl_UN_GDP_ihst_last3_lag1_sp,
                  exports_last3_lag1_sp, impexp_last3_lag1_sp, impexp_p5_last3_lag1_sp, impexp_p7_last3_lag1_sp,
                  aidd_comm_last3_lag1_sp,
                  o_comm_last3_lag1_sp, o_aidtotal_last3_lag1_sp, o_totof_last3_lag1_sp, o_totodflo_last3_lag1_sp, 
                  us_econ_last3_lag1_sp, us_mil_lag1_sp,
                  DT_NFL_MIDA_CD_last3_lag1_sp, DT_DOD_DIMF_CD_last3_lag1_sp, DT_DOD_MWBG_CD_last3_lag1_sp, DT_NFL_IMFC_CD_last3_lag1_sp,
                  fdist_UN_last3_lag1_sp, fdifl_UN_last3_lag1_sp,
                  hligo_lag_sp, non_hligo_lag_sp, hsigo_lag_sp, non_hsigo_lag_sp, hsigo_cb_lag_sp, non_cb_hsigo_lag_sp,
                  pam_warlength_days_log, bdbest_cumsum_log,
                  mediation_cumsum_log,
                  pop_WDI_PW_last3_lag1_log, growth_WDI_PW_last3_lag1, maxexclpop_EP_last3_lag1,
                  gdp_WDI_PW_last3_lag1, 
                  gdppc_WDI_PW_last3_lag1, gdppc_WDI_PW_ln_last3_lag1),
                list("b" = ~ mean(., na.rm = TRUE))))

# 2. Generate within variables ($x_{ij}-\bar{x}_j$):

# specific
# d <- mutate(group_by(d, pam_caseid),
#             pam_year_count_w = pam_year_count - mean(pam_year_count),
#             exports_GDP_ihst_last3_lag1_w = exports_GDP_ihst_last3_lag1_sp - mean(exports_GDP_ihst_last3_lag1_sp))

# generalized
d <- group_by(d, pam_caseid) %>%
  mutate(across(c(exports_GDP_ihst_last3_lag1_sp, impexp_GDP_ihst_last3_lag1_sp, impexp_p5_GDP_ihst_last3_lag1_sp, impexp_p7_GDP_ihst_last3_lag1_sp,
                  aidd_comm_GDP_ihst_last3_lag1_sp,
                  o_comm_GDP_ihst_last3_lag1_sp, o_aidtotal_GDP_ihst_last3_lag1_sp, o_totof_GDP_ihst_last3_lag1_sp, o_totodflo_GDP_ihst_last3_lag1_sp, 
                  us_econ_GDP_ihst_last3_lag1_sp, us_mil_GDP_ihst_last3_lag1_sp,
                  DT_NFL_MIDA_CD_GDP_last3_lag1_sp, DT_DOD_DIMF_CD_GDP_last3_lag1_sp, DT_DOD_MWBG_CD_GDP_last3_lag1_sp, 
                  fdist_UN_GDP_ihst_last3_lag1_sp, fdifl_UN_GDP_ihst_last3_lag1_sp,
                  exports_last3_lag1_sp, impexp_last3_lag1_sp, impexp_p5_last3_lag1_sp, impexp_p7_last3_lag1_sp,
                  aidd_comm_last3_lag1_sp,
                  o_comm_last3_lag1_sp, o_aidtotal_last3_lag1_sp, o_totof_last3_lag1_sp, o_totodflo_last3_lag1_sp, 
                  us_econ_last3_lag1_sp, us_mil_lag1_sp,
                  DT_NFL_MIDA_CD_last3_lag1_sp, DT_DOD_DIMF_CD_last3_lag1_sp, DT_DOD_MWBG_CD_last3_lag1_sp, DT_NFL_IMFC_CD_last3_lag1_sp,
                  fdist_UN_last3_lag1_sp, fdifl_UN_last3_lag1_sp,
                  hligo_lag_sp, non_hligo_lag_sp, hsigo_lag_sp, non_hsigo_lag_sp, hsigo_cb_lag_sp, non_cb_hsigo_lag_sp,
                  pam_warlength_days_log, bdbest_cumsum_log,
                  mediation_cumsum_log,
                  pop_WDI_PW_last3_lag1_log, growth_WDI_PW_last3_lag1, maxexclpop_EP_last3_lag1,
                  gdp_WDI_PW_last3_lag1, 
                  gdppc_WDI_PW_last3_lag1, gdppc_WDI_PW_ln_last3_lag1),
                list("w" = ~ . - mean(., na.rm = TRUE))))

# 3. Ready for the REWB model: y_{ij} = \beta_0 + \beta_1(x_{ij}-\bar{x}_j) + \beta_4 \bar{x}_j + \beta_2 z_j + (u_j + \epsilon_{ij})

# Export data -------------------------------------------------------------

rio::export(d, file = "Data/Work/implementation_allyears.csv")
saveRDS(d, file = "Data/Work/implementation_allyears.rds")
